start_time <- Sys.time()
#### #### Load Objects & Functions #### ####
######################################################
# Import functions
root = "./"
source(file.path(root,"MAIN.R"))
import_parameters(params)## **** __Utilized Cores__ **** = 2$subsetGenes
## [1] "protein_coding"
##
## $subsetCells
## [1] 500
##
## $resolution
## [1] 0.6
##
## $resultsPath
## [1] "./Results"
##
## $nCores
## [1] 2
##
## $perplexity
## [1] 30
# load("Results/Current_Pipeline/scRNAseq_results.RData")
# load(file.path(resultsPath, "3-11-2019/scRNAseq_results.RData"))
load(file.path("Data", "monocle3_CDS.RData"))
######################################################
#### #### PACKAGES #### ####
######################################################
print("Written using: Seurat version* 2.3.4 2018-07-17")## [1] "Written using: Seurat version* 2.3.4 2018-07-17"
# https://satijalab.org/seurat/install.html
# source("http://bit.ly/archived-seurat")
library(Seurat)
paste("Seurat", packageVersion("Seurat")) ## [1] "Seurat 3.1.0"
# library(monocle) # BiocManager::install("monocle")
# paste("monocle", packageVersion("monocle"))
library(monocle3)
paste("monocle3", packageVersion("monocle3")) ## [1] "monocle3 0.1.2"
library(garnett) # devtools::install_github("cole-trapnell-lab/garnett")
paste("garnett", packageVersion("garnett"))## [1] "garnett 0.2.3"
library(cowplot)
library(ggplot2)
library(dplyr)
library(data.table)
library(readxl)
library(reshape2)
library(ggrepel)
library(plotly)
######################################################
# Exporting 3D plots
knitr::knit_hooks$set(webgl = rgl::hook_webgl)
dge_limit <- F # 100
nCores <- 4#parallel::detectCores()
set.seed(2019)Also subset to only protein-coding genes.
cds <- monocle3::preprocess_cds(cds,
num_dim = 20, #100 by default
residual_model_formula_str = "~ nUMI + percent.mito")
monocle3::plot_pc_variance_explained(cds)Using UMAP.
In addition to clusters this function calculates partitions, which represent superclusters of the Louvain communities that are found using a kNN pruning method. Cluster assignments can be accessed using the clusters function and partition assignments can be accessed using the partitions function.
Using only the topN variable genes to cluster
# Method 1
# variable.genes <- cds@preprocess_aux@listData$gene_loadings[,1:3] %>%
# abs() %>% rowSums() %>% base::sort(decreasing = T)
# head(variable.genes)
varDAT <- Seurat::FindVariableGenes(object = protDAT,
mean.function = ExpMean,
dispersion.function = LogVMR,
selection.method ="dispersion", do.plot = T,
top.genes = 2000)## Error: 'FindVariableGenes' is not an exported object from 'namespace:Seurat'
## Error in eval(expr, envir, enclos): object 'varDAT' not found
## [1] "TMSB4X" "FTL" "S100A8" "FTH1" "B2M" "S100A9"
cds <- monocle3::cluster_cells(cds,
resolution = 1e-4,#c(10^seq(-6,-1)), # Do NOT set to high number
reduction_method = "UMAP",
cores = nCores,
clustering_genes = var.genes)
# Add cluster info to metadata (for easier DGE analysis)
pData(cds)$Cluster <- monocle3::clusters(cds)
pData(cds)$Partition <- monocle3::partitions(cds)
# 3D plot
p3d <- plot_cells_3d(cds, color_cells_by = "cluster", show_trajectory_graph = F)
p3d##
|
| | 0%
|
|=================================================================| 100%
## Warning in louvain_clustering(data, pd[row.names(data), ], k = k, weight = weight, : RANN counts the point itself, k must be smaller than
## the total number of points - 1 (all other points) - 1 (itself)!
# Order cells
root_pr_nodes = get_earliest_principal_node(cds, variable = "dx", variable_value = "PD")
cds <- monocle3::order_cells(cds, root_pr_nodes = root_pr_nodes)
monocle3::plot_cells(cds)## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous y-axis
Disease status (dx), mutation status (mut), and individual ID (ID) show good mixture across clusters.
for(v in c("dx","mut","ID")){
cat("\n")
cat("####",v,"\n")
p <- monocle3::plot_cells(cds, color_cells_by=v, label_cell_groups=F, show_trajectory_graph = F)
print(p)
cat("\n")
} Use a pre-trained classifier from Pline et al.
## Loading required package: AnnotationDbi
##
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:plotly':
##
## select
## The following object is masked from 'package:dplyr':
##
## select
##
## [1] "ACCNUM" "ALIAS" "ENSEMBL" "ENSEMBLPROT"
## [5] "ENSEMBLTRANS" "ENTREZID" "ENZYME" "EVIDENCE"
## [9] "EVIDENCEALL" "GENENAME" "GO" "GOALL"
## [13] "IPI" "MAP" "OMIM" "ONTOLOGY"
## [17] "ONTOLOGYALL" "PATH" "PFAM" "PMID"
## [21] "PROSITE" "REFSEQ" "SYMBOL" "UCSCKG"
## [25] "UNIGENE" "UNIPROT"
# load(url("https://cole-trapnell-lab.github.io/garnett/classifiers/hsPBMC"))
load("./Data/Garnett/hsPBMC")
cds = garnett::classify_cells(cds,
hsPBMC,
db = org.Hs.eg.db,
cluster_extend = T,
cds_gene_id_type = "SYMBOL")## Warning in doTryCatch(return(expr), name, parentenv, handler): The
## following genes used in the classifier are not present in the input CDS.
## Interpret with caution. ENSG00000174059The following genes used in the
## classifier are not present in the input CDS. Interpret with caution.
## ENSG00000007062The following genes used in the classifier are not present
## in the input CDS. Interpret with caution. ENSG00000157404The following
## genes used in the classifier are not present in the input CDS. Interpret
## with caution. ENSG00000185291
monocle3::plot_cells(cds,
group_cells_by="cluster",
color_cells_by="cluster_ext_type", # cell_type
show_trajectory_graph = F )## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 26788 rows containing non-finite values (stat_ydensity).
## Warning: Removed 26788 rows containing non-finite values (stat_summary).
meta <- data.frame(pData(cds))
# By dx
cell_proportions <- meta %>%
group_by(dx, Cluster) %>%
summarise(Proportion=n()) %>%
group_by(dx) %>%
mutate(Proportion=Proportion/sum(Proportion))
createDT(cell_proportions)prop.dx <- ggplot(cell_proportions, aes(x=dx, y=Proportion, fill = Cluster)) +
geom_col() + # position="dodge"
ylab("Proportion of Cells")
print(prop.dx)clust.key <- list("1"="Canonical Monocytes", "2"="Intermediate Monocytes")
for(clust in 1:2){
clust1.diff <- (subset(cell_proportions, Cluster==clust & dx=="PD")$Proportion - subset(cell_proportions, Cluster==clust & dx=="control")$Proportion)*100
print(paste("There is a",round(clust1.diff, 2),"% difference in the number of Cluster",clust,"cells (",clust.key[[as.character(clust)]], ") in Controls compared to PD."))
} ## [1] "There is a 5.22 % difference in the number of Cluster 1 cells ( Canonical Monocytes ) in Controls compared to PD."
## [1] "There is a -4.67 % difference in the number of Cluster 2 cells ( Intermediate Monocytes ) in Controls compared to PD."
# By mut
cell_proportions <- meta %>%
group_by(mut, Cluster) %>%
summarise(Proportion=n()) %>%
group_by(mut) %>%
mutate(Proportion=Proportion/sum(Proportion))
createDT(cell_proportions)prop.mut <- ggplot(cell_proportions, aes(x=mut, y=Proportion, fill = Cluster)) +
geom_col() + # position="dodge"
ylab("Proportion of Cells")
print(prop.mut) # By mut
cell_proportions <- meta %>%
group_by(dx, Cluster, ID) %>%
summarise(Proportion=n()) %>%
group_by(ID) %>%
mutate(Proportion=Proportion/sum(Proportion))
createDT(cell_proportions)prop.ID <- ggplot(cell_proportions, aes(x=ID, y=Proportion, fill = Cluster)) +
geom_col() + # position="dodge"
ylab("Proportion of Cells") +
facet_grid("~dx") +
theme(axis.text.x = element_text(angle = 45))
print(prop.ID) Save R object and run memory-intensive DGE analyses on computing cluster.